{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "from pandas import Series, DataFrame"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Beyond 1\n",
    "\n",
    "Create a data frame from four other columns (`VendorID`, `trip_distance`, `tip_amount`, and `total_amount`), specifying the `dtype` for each. What types are most appropriate? Can you use them directly, or must you first clean the data?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "df = pd.read_csv('../data/nyc_taxi_2020-01.csv',\n",
    "                usecols=['VendorID', 'trip_distance', 'tip_amount', 'total_amount'],\n",
    "                dtype={'VendorID':np.float16, 'trip_distance':np.float16, 'tip_amount':np.float16,\n",
    "                      'total_amount':np.float16})\n",
    "\n",
    "df = df.dropna().copy()\n",
    "df.loc['VendorID'] = df['VendorID'].astype(np.int8)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Beyond 2\n",
    "\n",
    "Instead of removing `NaN` values from the `VendorID` column, set it to a new value, 3. How does that affect your specifications and cleaning of the data?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = pd.read_csv('../data/nyc_taxi_2020-01.csv',\n",
    "            usecols=['VendorID', 'trip_distance', 'tip_amount', 'total_amount'],\n",
    "            dtype={'VendorID':float16, 'trip_distance':float16, 'tip_amount':float16,\n",
    "                  'total_amount':float16})\n",
    "\n",
    "df.loc['VendorID'] = df['VendorID'].fillna(3)\n",
    "df['VendorID'] = df['VendorID'].astype(int8)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Beyond 3\n",
    "\n",
    "We'll talk more about this in future chapters, but the `memory_usage` method allows you to see how much memory is being used by each column in a data frame. It returns a series of integers, in which the index lists the columns and the values represent the memory used by each column. Compare the memory used by the data frame with `float16` (which you've already used) and when you use `float64` instead for the final three columns."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# Memory usage with float16\n",
    "df.memory_usage().sum()  "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "ename": "IntCastingNaNError",
     "evalue": "Cannot convert non-finite values (NA or inf) to integer",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mIntCastingNaNError\u001b[0m                        Traceback (most recent call last)",
      "Cell \u001b[0;32mIn [8], line 7\u001b[0m\n\u001b[1;32m      1\u001b[0m df \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mread_csv(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m../data/nyc_taxi_2020-01.csv\u001b[39m\u001b[38;5;124m'\u001b[39m,\n\u001b[1;32m      2\u001b[0m             usecols\u001b[38;5;241m=\u001b[39m[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mVendorID\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtrip_distance\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtip_amount\u001b[39m\u001b[38;5;124m'\u001b[39m, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtotal_amount\u001b[39m\u001b[38;5;124m'\u001b[39m],\n\u001b[1;32m      3\u001b[0m             dtype\u001b[38;5;241m=\u001b[39m{\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mVendorID\u001b[39m\u001b[38;5;124m'\u001b[39m:np\u001b[38;5;241m.\u001b[39mfloat16, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtrip_distance\u001b[39m\u001b[38;5;124m'\u001b[39m:np\u001b[38;5;241m.\u001b[39mfloat64, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtip_amount\u001b[39m\u001b[38;5;124m'\u001b[39m:np\u001b[38;5;241m.\u001b[39mfloat64,\n\u001b[1;32m      4\u001b[0m                   \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtotal_amount\u001b[39m\u001b[38;5;124m'\u001b[39m:np\u001b[38;5;241m.\u001b[39mfloat64})\n\u001b[1;32m      6\u001b[0m df\u001b[38;5;241m.\u001b[39mloc[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mVendorID\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mVendorID\u001b[39m\u001b[38;5;124m'\u001b[39m]\u001b[38;5;241m.\u001b[39mfillna(\u001b[38;5;241m3\u001b[39m)\n\u001b[0;32m----> 7\u001b[0m df[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mVendorID\u001b[39m\u001b[38;5;124m'\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mVendorID\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mastype\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mint8\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m/usr/local/lib/python3.11/site-packages/pandas/core/generic.py:6240\u001b[0m, in \u001b[0;36mNDFrame.astype\u001b[0;34m(self, dtype, copy, errors)\u001b[0m\n\u001b[1;32m   6233\u001b[0m     results \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m   6234\u001b[0m         \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39miloc[:, i]\u001b[38;5;241m.\u001b[39mastype(dtype, copy\u001b[38;5;241m=\u001b[39mcopy)\n\u001b[1;32m   6235\u001b[0m         \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns))\n\u001b[1;32m   6236\u001b[0m     ]\n\u001b[1;32m   6238\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   6239\u001b[0m     \u001b[38;5;66;03m# else, only a single dtype is given\u001b[39;00m\n\u001b[0;32m-> 6240\u001b[0m     new_data \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_mgr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mastype\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   6241\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_constructor(new_data)\u001b[38;5;241m.\u001b[39m__finalize__(\u001b[38;5;28mself\u001b[39m, method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mastype\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m   6243\u001b[0m \u001b[38;5;66;03m# GH 33113: handle empty frame or series\u001b[39;00m\n",
      "File \u001b[0;32m/usr/local/lib/python3.11/site-packages/pandas/core/internals/managers.py:445\u001b[0m, in \u001b[0;36mBaseBlockManager.astype\u001b[0;34m(self, dtype, copy, errors)\u001b[0m\n\u001b[1;32m    444\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mastype\u001b[39m(\u001b[38;5;28mself\u001b[39m: T, dtype, copy: \u001b[38;5;28mbool\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m, errors: \u001b[38;5;28mstr\u001b[39m \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mraise\u001b[39m\u001b[38;5;124m\"\u001b[39m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m T:\n\u001b[0;32m--> 445\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mapply\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mastype\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m)\u001b[49m\n",
      "File \u001b[0;32m/usr/local/lib/python3.11/site-packages/pandas/core/internals/managers.py:347\u001b[0m, in \u001b[0;36mBaseBlockManager.apply\u001b[0;34m(self, f, align_keys, ignore_failures, **kwargs)\u001b[0m\n\u001b[1;32m    345\u001b[0m         applied \u001b[38;5;241m=\u001b[39m b\u001b[38;5;241m.\u001b[39mapply(f, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m    346\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 347\u001b[0m         applied \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mgetattr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mb\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mf\u001b[49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    348\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mTypeError\u001b[39;00m, \u001b[38;5;167;01mNotImplementedError\u001b[39;00m):\n\u001b[1;32m    349\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m ignore_failures:\n",
      "File \u001b[0;32m/usr/local/lib/python3.11/site-packages/pandas/core/internals/blocks.py:526\u001b[0m, in \u001b[0;36mBlock.astype\u001b[0;34m(self, dtype, copy, errors)\u001b[0m\n\u001b[1;32m    508\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m    509\u001b[0m \u001b[38;5;124;03mCoerce to the new dtype.\u001b[39;00m\n\u001b[1;32m    510\u001b[0m \n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    522\u001b[0m \u001b[38;5;124;03mBlock\u001b[39;00m\n\u001b[1;32m    523\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m    524\u001b[0m values \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mvalues\n\u001b[0;32m--> 526\u001b[0m new_values \u001b[38;5;241m=\u001b[39m \u001b[43mastype_array_safe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    528\u001b[0m new_values \u001b[38;5;241m=\u001b[39m maybe_coerce_values(new_values)\n\u001b[1;32m    529\u001b[0m newb \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmake_block(new_values)\n",
      "File \u001b[0;32m/usr/local/lib/python3.11/site-packages/pandas/core/dtypes/astype.py:299\u001b[0m, in \u001b[0;36mastype_array_safe\u001b[0;34m(values, dtype, copy, errors)\u001b[0m\n\u001b[1;32m    296\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m values\u001b[38;5;241m.\u001b[39mcopy()\n\u001b[1;32m    298\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 299\u001b[0m     new_values \u001b[38;5;241m=\u001b[39m \u001b[43mastype_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    300\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (\u001b[38;5;167;01mValueError\u001b[39;00m, \u001b[38;5;167;01mTypeError\u001b[39;00m):\n\u001b[1;32m    301\u001b[0m     \u001b[38;5;66;03m# e.g. astype_nansafe can fail on object-dtype of strings\u001b[39;00m\n\u001b[1;32m    302\u001b[0m     \u001b[38;5;66;03m#  trying to convert to float\u001b[39;00m\n\u001b[1;32m    303\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m errors \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n",
      "File \u001b[0;32m/usr/local/lib/python3.11/site-packages/pandas/core/dtypes/astype.py:230\u001b[0m, in \u001b[0;36mastype_array\u001b[0;34m(values, dtype, copy)\u001b[0m\n\u001b[1;32m    227\u001b[0m     values \u001b[38;5;241m=\u001b[39m values\u001b[38;5;241m.\u001b[39mastype(dtype, copy\u001b[38;5;241m=\u001b[39mcopy)\n\u001b[1;32m    229\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 230\u001b[0m     values \u001b[38;5;241m=\u001b[39m \u001b[43mastype_nansafe\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    232\u001b[0m \u001b[38;5;66;03m# in pandas we don't store numpy str dtypes, so convert to object\u001b[39;00m\n\u001b[1;32m    233\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(dtype, np\u001b[38;5;241m.\u001b[39mdtype) \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28missubclass\u001b[39m(values\u001b[38;5;241m.\u001b[39mdtype\u001b[38;5;241m.\u001b[39mtype, \u001b[38;5;28mstr\u001b[39m):\n",
      "File \u001b[0;32m/usr/local/lib/python3.11/site-packages/pandas/core/dtypes/astype.py:140\u001b[0m, in \u001b[0;36mastype_nansafe\u001b[0;34m(arr, dtype, copy, skipna)\u001b[0m\n\u001b[1;32m    137\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcannot astype a timedelta from [\u001b[39m\u001b[38;5;132;01m{\u001b[39;00marr\u001b[38;5;241m.\u001b[39mdtype\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m] to [\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdtype\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m]\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    139\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m np\u001b[38;5;241m.\u001b[39missubdtype(arr\u001b[38;5;241m.\u001b[39mdtype, np\u001b[38;5;241m.\u001b[39mfloating) \u001b[38;5;129;01mand\u001b[39;00m is_integer_dtype(dtype):\n\u001b[0;32m--> 140\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_astype_float_to_int_nansafe\u001b[49m\u001b[43m(\u001b[49m\u001b[43marr\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    142\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m is_object_dtype(arr\u001b[38;5;241m.\u001b[39mdtype):\n\u001b[1;32m    143\u001b[0m \n\u001b[1;32m    144\u001b[0m     \u001b[38;5;66;03m# if we have a datetime/timedelta array of objects\u001b[39;00m\n\u001b[1;32m    145\u001b[0m     \u001b[38;5;66;03m# then coerce to a proper dtype and recall astype_nansafe\u001b[39;00m\n\u001b[1;32m    147\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m is_datetime64_dtype(dtype):\n",
      "File \u001b[0;32m/usr/local/lib/python3.11/site-packages/pandas/core/dtypes/astype.py:182\u001b[0m, in \u001b[0;36m_astype_float_to_int_nansafe\u001b[0;34m(values, dtype, copy)\u001b[0m\n\u001b[1;32m    178\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m    179\u001b[0m \u001b[38;5;124;03mastype with a check preventing converting NaN to an meaningless integer value.\u001b[39;00m\n\u001b[1;32m    180\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m    181\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m np\u001b[38;5;241m.\u001b[39misfinite(values)\u001b[38;5;241m.\u001b[39mall():\n\u001b[0;32m--> 182\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m IntCastingNaNError(\n\u001b[1;32m    183\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCannot convert non-finite values (NA or inf) to integer\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    184\u001b[0m     )\n\u001b[1;32m    185\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m dtype\u001b[38;5;241m.\u001b[39mkind \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mu\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[1;32m    186\u001b[0m     \u001b[38;5;66;03m# GH#45151\u001b[39;00m\n\u001b[1;32m    187\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m (values \u001b[38;5;241m>\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m)\u001b[38;5;241m.\u001b[39mall():\n",
      "\u001b[0;31mIntCastingNaNError\u001b[0m: Cannot convert non-finite values (NA or inf) to integer"
     ]
    }
   ],
   "source": [
    "df = pd.read_csv('../data/nyc_taxi_2020-01.csv',\n",
    "            usecols=['VendorID', 'trip_distance', 'tip_amount', 'total_amount'],\n",
    "            dtype={'VendorID':np.float16, 'trip_distance':np.float64, 'tip_amount':np.float64,\n",
    "                  'total_amount':np.float64})\n",
    "\n",
    "df.loc['VendorID'] = df['VendorID'].fillna(3)\n",
    "df['VendorID'] = df['VendorID'].astype(np.int8)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# Memory usage with float64\n",
    "df.memory_usage().sum()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# float64 uses about 3.5x the memory as float16!\n",
    "160125328 / 44835184"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
